<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8" />
  <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
  <title>Gemini API Image Mask Visualization</title>
  <script type="module">
    import { GoogleGenAI } from 'https://cdn.jsdelivr.net/npm/@google/genai@latest/+esm';
    import { marked }     from 'https://esm.run/marked';

    function getApiKey() {
      let apiKey = localStorage.getItem("GEMINI_API_KEY");
      if (!apiKey) {
        apiKey = prompt("Please enter your Gemini API key:");
        if (apiKey) {
          localStorage.setItem("GEMINI_API_KEY", apiKey);
        }
      }
      return apiKey;
    }

    // Instantiate the GenAI client once
    const ai = new GoogleGenAI({ apiKey: getApiKey() });

    async function resizeAndCompressImage(file) {
      return new Promise(resolve => {
        const reader = new FileReader();
        reader.onload = function(event) {
          const img = new Image();
          img.onload = function() {
            const canvas = document.createElement('canvas');
            const ctx = canvas.getContext('2d');
            let width = img.width;
            let height = img.height;
            if (width > 1000) {
              height = Math.round((height * 1000) / width);
              width = 1000;
            }
            canvas.width = width;
            canvas.height = height;
            ctx.drawImage(img, 0, 0, width, height);
            canvas.toBlob(function(blob) {
              resolve(new File([blob], 'compressed.jpg', { type: 'image/jpeg' }));
            }, 'image/jpeg', 0.7);
          };
          img.src = event.target.result;
        };
        reader.readAsDataURL(file);
      });
    }

    async function processImageAndPrompt() {
      const fileInput   = document.getElementById('imageInput');
      const promptInput = document.getElementById('promptInput');
      const modelSelect = document.getElementById('modelSelect');
      const resultDiv   = document.getElementById('result');
      const usageDiv    = document.getElementById('tokenUsage');

      if (!promptInput.value.trim()) {
        alert('Please enter a prompt.');
        return;
      }
      resultDiv.innerHTML = 'Processing…';
      usageDiv.textContent = '';

      // Build the contents array
      const contents = [ promptInput.value ];

      if (fileInput.files && fileInput.files[0]) {
        const compressed = await resizeAndCompressImage(fileInput.files[0]);
        const part = await new Promise(resolve => {
          const reader = new FileReader();
          reader.onloadend = function() {
            resolve({
              inlineData: {
                data: reader.result.split(',')[1],
                mimeType: compressed.type
              }
            });
          };
          reader.readAsDataURL(compressed);
        });
        contents.push(part);
      }

      // Check if the selected option wants non‑thinking
      const selectedOpt   = modelSelect.selectedOptions[0];
      const isNonThinking = selectedOpt.dataset.nonThinking === 'true';
      const modelName     = selectedOpt.value;

      // Build request payload
      const req = {
        model:    modelName,
        contents: contents
      };
      if (isNonThinking) {
        req.config = {
          thinkingConfig: { thinkingBudget: 0 }
        };
      }

      // Call the GenAI SDK
      const response = await ai.models.generateContent(req);
      const text     = response.text;
      resultDiv.innerHTML = marked.parse(text);

      // Display token usage
      if (response.usageMetadata) {
        const inTokens  = response.usageMetadata.promptTokenCount;
        const outTokens = response.usageMetadata.candidatesTokenCount;
        usageDiv.textContent = 
          `Input tokens: ${inTokens.toLocaleString()}  •  Output tokens: ${outTokens.toLocaleString()}`;
      }

      // If an image was provided, overlay boxes & masks
      if (fileInput.files && fileInput.files[0]) {
        const boxes = extractBoxAndMask(text);
        if (boxes.length) {
          displayImageWithBoundingBoxesAndMasks(fileInput.files[0], boxes);
        }
      }
    }

    function extractBoxAndMask(text) {
      const regex = /"box_2d"\s*:\s*\[\s*\d+\s*,\s*\d+\s*,\s*\d+\s*,\s*\d+\s*\]\s*,\s*"mask"\s*:\s*"(data:image\/png;base64,[^"]+)"/g;
      const results = [];
      let match;

      while ((match = regex.exec(text)) !== null) {
        try {
          const snippet = text.substring(match.index, match.index + 200);
          const boxMatch = snippet.match(/"box_2d"\s*:\s*(\[\s*\d+\s*,\s*\d+\s*,\s*\d+\s*,\s*\d+\s*\])/);
          if (boxMatch && boxMatch[1]) {
            const box  = JSON.parse(boxMatch[1]);
            const mask = match[1];
            results.push({ box, mask });
          }
        } catch (e) {
          console.error("Failed to parse box and mask:", e);
        }
      }
      return results;
    }

    function displayImageWithBoundingBoxesAndMasks(file, boxAndMaskData) {
      const reader = new FileReader();
      reader.onload = function(event) {
        const image = new Image();
        image.onload = function() {
          const canvas = document.getElementById('canvas');
          canvas.width  = image.width + 100;
          canvas.height = image.height + 100;
          const ctx = canvas.getContext('2d');

          // Draw the base image
          ctx.drawImage(image, 80, 20);

          // Grid lines
          ctx.strokeStyle = 'rgba(255, 0, 0, 0.5)';
          ctx.lineWidth = 1;
          for (let i = 0; i <= 1000; i += 100) {
            const x = 80 + (i / 1000) * image.width;
            ctx.beginPath();
            ctx.moveTo(x, 20);
            ctx.lineTo(x, image.height + 20);
            ctx.stroke();
          }
          for (let i = 0; i <= 1000; i += 100) {
            const y = 20 + ((1000 - i) / 1000) * image.height;
            ctx.beginPath();
            ctx.moveTo(80, y);
            ctx.lineTo(image.width + 80, y);
            ctx.stroke();
          }

          // Prepare preview rows
          const boundingBoxImages = document.getElementById('boundingBoxImages');
          boundingBoxImages.innerHTML = '';
          const colors = ['#FF0000','#00FF00','#0000FF','#FFFF00','#FF00FF','#00FFFF'];

          boxAndMaskData.forEach(function(data, index) {
            const [ymin,xmin,ymax,xmax] = data.box;
            const norm = (ymin>1000||xmin>1000||ymax>1000||xmax>1000)
              ? [
                  (ymin/image.height)*1000,
                  (xmin/image.width)*1000,
                  (ymax/image.height)*1000,
                  (xmax/image.width)*1000
                ]
              : data.box;

            const [nYmin,nXmin,nYmax,nXmax] = norm.map(v=>v/1000);
            const wPx = (nXmax-nXmin)*image.width;
            const hPx = (nYmax-nYmin)*image.height;
            if (wPx===0||hPx===0) return;

            // Overlay box
            ctx.strokeStyle = colors[index%colors.length];
            ctx.lineWidth   = 5;
            ctx.strokeRect(
              nXmin*image.width+80,
              nYmin*image.height+20,
              wPx,
              hPx
            );

            // Overlay mask
            const maskImg = new Image();
            maskImg.onload = function() {
              ctx.globalAlpha = 0.7;
              ctx.drawImage(
                maskImg,
                nXmin*image.width+80,
                nYmin*image.height+20,
                wPx,
                hPx
              );
              ctx.globalAlpha = 1;
            };
            maskImg.src = data.mask;

            // Build preview row
            const MAX_H = 200;
            const scale = MAX_H / hPx;
            const scaledW = Math.round(wPx * scale);
            const row = document.createElement('div');
            row.className = 'bounding-box-container';

            const label = document.createElement('p');
            label.textContent = `Box with Mask: [${data.box.join(', ')}]`;
            row.appendChild(label);

            const compare = document.createElement('div');
            compare.className = 'image-comparison';
            compare.style.gap = '20px';

            function makeCol() {
              const col = document.createElement('div');
              col.style.flex = '0 0 auto';
              col.style.width = scaledW + 'px';
              col.style.height = MAX_H + 'px';
              col.style.overflow = 'hidden';
              return col;
            }

            const leftCol = makeCol();
            const cropCanvas = document.createElement('canvas');
            cropCanvas.width  = wPx;
            cropCanvas.height = hPx;
            cropCanvas.getContext('2d')
              .drawImage(image, nXmin*image.width, nYmin*image.height, wPx, hPx, 0, 0, wPx, hPx);
            const cropImg = document.createElement('img');
            cropImg.src = cropCanvas.toDataURL('image/jpeg');
            Object.assign(cropImg.style, {
              width:'100%',height:'100%',objectFit:'fill'
            });
            leftCol.appendChild(cropImg);

            const rightCol = makeCol();
            rightCol.style.border = '2px solid ' + colors[index%colors.length];
            const maskPrev = document.createElement('img');
            maskPrev.src = data.mask;
            Object.assign(maskPrev.style, {
              width:'100%',height:'100%',objectFit:'fill'
            });
            rightCol.appendChild(maskPrev);

            compare.appendChild(leftCol);
            compare.appendChild(rightCol);
            row.appendChild(compare);
            boundingBoxImages.appendChild(row);
          });

          // Axes & labels
          ctx.strokeStyle = '#000';
          ctx.lineWidth = 1;
          ctx.font = '26px Arial';
          ctx.textAlign = 'right';
          ctx.beginPath();
          ctx.moveTo(80,20);
          ctx.lineTo(80,image.height+20);
          ctx.stroke();
          for (let i=0;i<=1000;i+=100) {
            const y = 20 + (i/1000)*image.height;
            ctx.fillText(i.toString(),75,y+5);
            ctx.beginPath();
            ctx.moveTo(75,y);
            ctx.lineTo(80,y);
            ctx.stroke();
          }
          ctx.beginPath();
          ctx.moveTo(80,image.height+20);
          ctx.lineTo(image.width+80,image.height+20);
          ctx.stroke();
          ctx.textAlign = 'center';
          for (let i=0;i<=1000;i+=100) {
            const x = 80 + (i/1000)*image.width;
            ctx.fillText(i.toString(),x,image.height+40);
            ctx.beginPath();
            ctx.moveTo(x,image.height+20);
            ctx.lineTo(x,image.height+25);
            ctx.stroke();
          }
        };
        image.src = event.target.result;
      };
      reader.readAsDataURL(file);
    }

    function clearImage() {
      const input = document.getElementById('imageInput');
      input.value = '';
      const canvas = document.getElementById('canvas');
      canvas.getContext('2d').clearRect(0,0,canvas.width,canvas.height);
      const img = document.getElementById('imagePreview');
      img.src = '';
      img.style.display = 'none';
      document.getElementById('boundingBoxImages').innerHTML = '';
      document.getElementById('tokenUsage').textContent = '';
    }

    function previewImage(event) {
      const file = event.target.files[0];
      if (!file) return;
      const reader = new FileReader();
      reader.onload = function(e) {
        const img = document.getElementById('imagePreview');
        img.src = e.target.result;
        img.style.display = 'block';
      };
      reader.readAsDataURL(file);
    }

    document.getElementById('submitBtn').addEventListener('click', processImageAndPrompt);
    document.getElementById('clearImageBtn').addEventListener('click', clearImage);
    document.getElementById('imageInput').addEventListener('change', previewImage);
  </script>

  <style>
    body { font-family: Arial, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; }
    textarea { width: 100%; height: 100px; }
    #result {
      margin-top: 20px;
      border: 1px solid #ccc;
      padding: 10px;
      overflow-x: auto;
      white-space: pre-wrap;
      max-height: 500px;
      overflow-y: auto;
    }
    #tokenUsage {
      margin-top: 8px;
      font-size: 0.9em;
      color: #555;
    }
    #canvas, #imagePreview { max-width: 100%; }
    #boundingBoxImages img { max-width: 100%; }
    .bounding-box-container {
      display: block;
      margin-bottom: 30px;
      border-bottom: 1px solid #eee;
      padding-bottom: 20px;
    }
    .bounding-box-container p { margin: 0 0 10px; font-weight: bold; }
    .image-comparison { display: flex; gap: 20px; }
  </style>
</head>
<body>
  <h1>Gemini API Image Mask Visualization</h1>
  <select id="modelSelect">
    <option value="gemini-2.5-flash">gemini-2.5-flash (default)</option>
    <option value="gemini-2.5-pro">gemini-2.5-pro</option>
    <option value="gemini-2.5-flash-lite-preview-06-17">gemini-2.5-flash-lite-preview-06-17</option>
    <option value="gemini-2.5-flash-preview-04-17" data-non-thinking="true">
      gemini-2.5-flash-preview-04-17 (non thinking)
    </option>
    <option value="gemini-2.5-flash-preview-04-17">gemini-2.5-flash-preview-04-17</option>
    <option value="gemini-2.5-pro-exp-03-25">gemini-2.5-pro-exp-03-25</option>
  </select>
  <input type="file" id="imageInput" accept="image/*" />
  <button id="clearImageBtn">Clear Image</button>

  <textarea id="promptInput">
Give the segmentation masks for the objects. 
Output a JSON list of segmentation masks where each entry contains the 2D bounding box in "box_2d" and the mask in "mask".
  </textarea>

  <button id="submitBtn">Process</button>

  <div id="result"></div>
  <div id="tokenUsage"></div>

  <div id="imageContainer">
    <img id="imagePreview" alt="Image preview" />
    <canvas id="canvas"></canvas>
  </div>

  <div id="boundingBoxImages"></div>
</body>
</html>
